This is the code that obtains the Tweets for analysis.

#load these packages
x <- c('twitteR', 'ggplot2', 'dplyr', 'purrr','ROAuth', 'httr', 'base64enc', 'tm')
lapply(x, require, character.only = T)

#establish credentials
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")

#Credentials have been altered
#Credentials can be obtained from https://developer.twitter.com/
setup_twitter_oauth(consumer_key='"FEDNBJKXZAWLPCTQVGIRHMOUS"',
                    consumer_secret='YRQIIJUCUEWUPZTPWFIDTEPXUUGGFJXZKSEXTXLWTVMIPZYVBI',
                    access_token = "UIRWZFUEAUYRLSBSICBDTQKUZCDTWAYLCWMCDOHGVIBJGSFMKQ",
                    access_secret = "LBSVUGJNJFXRGEUQUENPLWHBDRWRMDMFXXAHBIHGCCSGK")

#Obtain the tweets
tweets <- searchTwitter('', n = 500000, geocode = '35.846135,-86.393137,50mi', since = '2017-10-27', until = '2017-10-29')
tweets_df <- tbl_df(map_df(tweets, as.data.frame))

This processess the Tweets obtained during the analysis.

#load packages
x <- c("readr", "stringr", "plyr", "tm", "stringi", "stringr", "tm", "RCurl")
lapply(x, require, character.only = T)

#read data and remove unneccesary variables
url <- "https://raw.githubusercontent.com/twitter260/twitter260.github.io/master/our_code/murfreesboro102817.csv"
mbo <- read.csv(url(url), stringsAsFactors = FALSE)

#remove variables personal to Daniel Briggs
mbo$favorited <- NULL
mbo$statusSource <- NULL
mbo$retweeted <- NULL

#identifies all english stopwords
Stopwords <- stopwords(kind = "en")

#user defined function for easy cleaning
'%!in%' <- function(x,y)!('%in%'(x,y))

#what we will clean
mbo[,1] <- tolower(mbo[1:dim(mbo)[1],1])

#remove stop words
tweets <- unlist(lapply(mbo[,1], function(tweet) {
    text <- unlist(strsplit(tweet, " "))
    text <- text[text %!in% Stopwords]
    tweet <- paste(text, collapse = " ")
}))

#removes URLS
replace_reg <- "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https"

for(i in 1:length(mbo[,1])){
  tweet <-tweets[i]
  tweet <- sub("rt ", "", tweet) #remove retweet 
  tweet <- gsub("@\\w+", "", tweet) # remove at(@)
  tweet <- gsub("&lt;3","",tweet) #removes ASCII hearts <3 
  tweet <- gsub("&lt;|&gt;|&le;|&ge;","",tweet) #removes html <, >, <=, >=
  tweet <- str_replace_all(tweet ,replace_reg, "")  # remove links https 
  tweet <- gsub("[ |\t]{2,}", " ", tweet) # remove tabs 
  tweet <- iconv(tweet, "latin1", "ASCII", sub="") #makes emojis readable 
  tweet <- gsub("<[^>]+>", "", tweet) #removes remaining text from emojis
  tweet <- gsub('[[:punct:] ]+',' ',tweet) #removes punctuation
  tweet <- gsub("[\r|\n|\t|\v|\f]", "", tweet) #removes form feeds tabs etc
  tweet <- gsub("^ ", "", tweet)  # remove blank spaces at the beginning
  tweet <- gsub(" $", "", tweet) # remove blank spaces at the end
  mbo[i,1] <- tweet
}

We modify the time at which the Tweet was created.

myRound <- function (x, convert = TRUE)  {
  as.Date(x)
  x <- as.POSIXlt(x)
  mins <- x$min
  mult <- mins %/% 15
  remain <- mins %% 15
  if(remain > 7L || (remain == 7L && x$sec > 29))
    mult <- mult + 1
  if(mult > 3) {
    x$min <- 0
    x <- x + 3600
  } else {
    x$min <- 15 * mult
  }
  x <- trunc.POSIXt(x, units = "mins")
  if(convert) {
    x <- format(x, format = "%Y-%m-%d %H:%M")
  }
  x
}

We perform the sentiment analysis in this code

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidytext)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1.9000     v readr   1.1.1     
## v tibble  1.3.4          v purrr   0.2.4     
## v tidyr   0.7.2          v stringr 1.2.0     
## v ggplot2 2.2.1.9000     v forcats 0.2.0
## -- Conflicts ----------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stringr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(knitr)
library(chron)
## Warning: package 'chron' was built under R version 3.4.3
## 
## Attaching package: 'chron'
## The following objects are masked from 'package:lubridate':
## 
##     days, hours, minutes, seconds, years
data("stop_words")

url <- "https://raw.githubusercontent.com/twitter260/twitter260.github.io/master/our_code/text_identities.csv"
tweets <- read.csv(url(url), stringsAsFactors = FALSE)

tweet_text <- tweets %>%
  distinct(created, screenName, text) %>%
  mutate(created = ymd_hms(created))

#Seperates tweets into one line per word

tokens <- unnest_tokens(tweets, word, text, to_lower = TRUE) %>%
  anti_join(stop_words)
## Joining, by = "word"
# each sentiment library provides a different method of assigning sentiment to a word

#nrc assigns multiple sentiment to each word
nrc_sentiments <- get_sentiments("nrc")

#afinn assigns numeric value form -5 to 5 (-5 being most negative sentiment)
afinn_sentiments <- get_sentiments("afinn")

#Calculates mean sentiment by tweet

tweets_afinn_sentiment <- tokens %>%
  inner_join(afinn_sentiments, by = "word") %>%
  group_by(screenName, created) %>%
  mutate(tweet_sentiment = mean(score),
         day = "", 
         day = replace(day, str_sub(created, start = 9, end = 10) == "29", "Sunday"),
         day = replace(day, str_sub(created, start = 9, end = 10) == "28", "Saturday"),
         day = replace(day, str_sub(created, start = 9, end = 10) == "27", "Friday"),
         minutes_15 = round(60 * 24 * as.numeric(times(str_sub(created, start = -8, end = -1))), 0),
         minutes_15 = trunc(minutes_15/15)) %>%
  ungroup() %>%
  mutate(created = ymd_hms(created)) %>%
  select(created, id, screenName, retweetCount, tweet_sentiment, day, minutes_15, replyToSN)


# Rounding time of tweet to the nearest 15 minutes

tweets_afinn_sentiment$round_qhr <- round_qhr <- as.POSIXct(round(as.double(tweets_afinn_sentiment$created)/(15*60))*(15*60), origin=(as.POSIXct('1970-01-01')))


#sample of most negative tweets

tweets_afinn_sentiment %>%
  distinct(created, screenName, tweet_sentiment) %>%
  inner_join(tweet_text, by = c("created", "screenName")) %>%
  arrange(tweet_sentiment) %>%
  head(20) %>%
  kable()
created screenName tweet_sentiment text
2017-10-29 23:21:40 shadesdad -5 bitch nascaronnbcsn
2017-10-29 22:42:49 caitlinwixted -5 bitch son go sit potty
2017-10-29 21:52:10 TiffanyWhi40 -5 oregon slut curvygirl courtesan tugjob masturbate incall cumslut teens snapaddme instababy
2017-10-29 20:38:20 AmandaJon39 -5 connecticut domination asian datingadvice lesbo slut skypesex webcamsex fisting snapme undressed
2017-10-29 19:33:37 Leek__Sosa -5 niggas artificial prolly never touched pistol nigga hoe
2017-10-29 17:41:23 MarianneNes38 -5 nevada pussyfuck threesome adultsingles slut hotsluts textchat camgirl facials whatsapp play
2017-10-29 16:49:22 JUICYASSTIYE -5 aspire baddest bitch next year
2017-10-29 16:43:37 donna1986_donna -5 hollywood go half dressed kids see dont bitch that are
2017-10-29 16:11:30 TishaGon34 -5 maryland 3some legs cim fisting slut skypeshow chaturbate nudes kikgirl sun
2017-10-29 14:30:35 alexvxis -5 bitch im next
2017-10-29 13:02:21 TNvolsBoy -5 sorry son bitch gone
2017-10-29 07:50:12 yddammmaddy -5 just kidding bitch
2017-10-29 05:28:49 taebaeb__ -5 bitch
2017-10-29 05:22:49 KadianPat36 -5 missouri cunt masturbate tinder cumshot hotwife phonesex nudecam feet sext instamoment
2017-10-29 05:17:24 MistyTur44 -5 alaska cunt uniform erotic doggy sluts incall webcammodel creampie sextalk selfshot
2017-10-29 04:47:07 sab_rin_aaa -5 fr bitch
2017-10-29 04:40:12 summerlayne5 -5 in girl world halloween 1 day year girl can dress like total slut girls can say anything else it
2017-10-29 04:17:22 wildkountrykat -5 muelleriscoming buckle bitches
2017-10-29 03:41:58 rissniicole -5 bitch feel good
2017-10-29 03:38:40 taxo_gang -5 back bitch series tied 2 2 dodgernation
##Big players for network analysis

network_stars <-  c("BigBoyVol", "BlueRaiderDJ", "Limbaugh2016", "ChadCaldwell24", 
                    "dennisbrucemor1", "realDonaldTrump", "DeLoachJW", "jdbswim",
                    "mikerapp", "ColeenC123", "cbrentv3", "BlakeShelton193",
                    "justasking3time")


#filter tweets for networks of big players, create network identifier

network_stars_tweets <- tweets_afinn_sentiment %>%
  filter(replyToSN %in% network_stars | screenName %in% network_stars) %>%
  mutate(network = ifelse(replyToSN %in% network_stars, replyToSN, NA),
         network = ifelse(is.na(network), screenName, replyToSN))

#Plots

# All tweets sentiment over time (saturday)


saturday <-  tweets_afinn_sentiment %>%
  filter(created < as.POSIXct("2017-10-28 17:30:00") & created > as.POSIXct("2017-10-28 02:00:00")) %>%
  group_by(day, minutes_15) %>%
  mutate(tweet_sentiment = mean(tweet_sentiment),
         n = n(),
         Positive = (tweet_sentiment > 0)) %>%
  filter(day == "Saturday") %>%
  ggplot(aes(x = round_qhr, y = tweet_sentiment)) +
  geom_point(aes(size = n, color = Positive), alpha = 0.75) +
  geom_smooth(color = "orange", linetype = "dotted") +
  scale_x_datetime(labels = date_format("%H:%M"), breaks = date_breaks("2 hour")) +
  scale_y_continuous(limits = c(-2, 2)) +
  xlab("Hour") +
  ylab("Average Twitter Sentiment") +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 03:00:00"), y = -2, xend = as.POSIXct("2017-10-28 03:00:00"), yend = -0.5), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 03:00:00"), y = -0.35, label = "Murfreesboro: Police \n close town square"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 09:00:00"), y = 2, xend = as.POSIXct("2017-10-28 09:00:00"), yend = 1.6), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 09:00:00"), y = 1.5, label = "Shelbyville: Law enforcement \n arrives in riot gear"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 10:15:00"), y = -2, xend = as.POSIXct("2017-10-28 10:15:00"), yend = -1.6), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 10:15:00"), y = -1.5, label = "Shelbyville: First white nationalist \n and counter-protestors arrive"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 12:00:00"), y = 2, xend = as.POSIXct("2017-10-28 12:00:00"), yend = -0.65), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 12:00:00"), y = -0.75, label = "Shelbyville: 400 counter-protestors, \n 200 white nationalists on site"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 14:00:00"), y = 2, xend = as.POSIXct("2017-10-28 14:00:00"), yend = -0.2), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 15:30:00"), y = -0.3, label = "Shelbyville: White nationalists elect \n to move to Murfeesboro"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 16:00:00"), y = -2, xend = as.POSIXct("2017-10-28 16:00:00"), yend = -1.25), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 16:30:00"), y = -1.1, label = "Murfreesboro rally fizzles \n as white nationalist numbers dwindle"), size = 4, data = data.frame()) +
  ggtitle("Average Sentiment during Murfreesboro/Shelbyville Protest") +
  theme(legend.title = element_text(size = 15), legend.text = element_text(size = 10), plot.title = element_text(hjust = 0.5, size = 25), axis.text = element_text(size = 15), axis.title = element_text(size = 15)) 


# Big player networks over time

saturday_stars <- network_stars_tweets %>%
  filter(created < as.POSIXct("2017-10-28 17:30:00") & created > as.POSIXct("2017-10-28 02:00:00")) %>%
  group_by(day, minutes_15) %>%
  mutate(tweet_sentiment = mean(tweet_sentiment),
         n = n(),
         Positive = (tweet_sentiment > 0)) %>%
  filter(day == "Saturday") %>%
  ggplot(aes(x = round_qhr, y = tweet_sentiment)) +
  geom_point(aes(size = n, color = Positive), alpha = 0.75) +
  geom_smooth(color = "orange", linetype = "dotted", se = FALSE) +
  scale_x_datetime(labels = date_format("%H:%M"), breaks = date_breaks("4 hour")) +
  scale_y_continuous(limits = c(-3, 3)) +
  xlab("Hour") +
  ylab("Average Twitter Sentiment") +
  facet_wrap(~network, nrow = 3) +
  ggtitle("Average Sentiment during Murfreesboro/Shelbyville Protest by Network") +
  theme(legend.title = element_text(size = 15), legend.text = element_text(size = 10), plot.title = element_text(hjust = 0.5, size = 15), axis.text = element_text(size = 8), axis.title = element_text(size = 15)) 

  

# NRC sentiments for networks

tweets_nrc_sentiment <- tokens %>%
  inner_join(nrc_sentiments, by = "word") %>%
  mutate(day = "", 
         day = replace(day, str_sub(created, start = 9, end = 10) == "29", "Sunday"),
         day = replace(day, str_sub(created, start = 9, end = 10) == "28", "Saturday"),
         day = replace(day, str_sub(created, start = 9, end = 10) == "27", "Friday"),
         minutes_15 = round(60 * 24 * as.numeric(times(str_sub(created, start = -8, end = -1))), 0),
         minutes_15 = trunc(minutes_15/15),
         created = ymd_hms(created)) %>%
  select(created, id, screenName, retweetCount, sentiment, day, minutes_15, replyToSN) %>%
  filter(replyToSN %in% network_stars | screenName %in% network_stars) %>%
  mutate(network = ifelse(screenName %in% network_stars, screenName, NA),
         network = replace(network, replyToSN %in% network_stars, replyToSN))
## Warning in x[list] <- values: number of items to replace is not a multiple
## of replacement length
tweets_nrc_sentiment$round_qhr <- round_qhr <- as.POSIXct(round(as.double(tweets_nrc_sentiment$created)/(15*60))*(15*60), origin=(as.POSIXct('1970-01-01')))

# nrc count of sentiments over time

sentiment_nrc_time <- tweets_nrc_sentiment %>%
  group_by(day, minutes_15) %>%
  mutate(n_time = n()) %>%
  ungroup() %>%
  group_by(day, minutes_15, sentiment) %>%
  mutate(n_sentiment = n(), sentiment_share = n_sentiment/n_time) %>%
  ungroup() %>%
  mutate(sentiment_overall = ifelse(sentiment %in% c("fear", "anticipation", "surprise"), "Suspense", NA),
         sentiment_overall = replace(sentiment_overall, sentiment %in% c("positive", "joy", "trust"), "Positive"),
         sentiment_overall = replace(sentiment_overall, is.na(sentiment_overall), "Negative")) %>%
  distinct(sentiment, day, round_qhr, n_time, n_sentiment, sentiment_share, created, sentiment_overall)



#Plot of sentiment share over time

sentiment_share <- sentiment_nrc_time %>%
  filter(created < as.POSIXct("2017-10-28 17:30:00") & created > as.POSIXct("2017-10-28 02:00:00")) %>%
  filter(day == "Saturday") %>%
  ggplot(aes(x = round_qhr, y = sentiment_share, color = sentiment)) + 
  geom_smooth(se = FALSE) +
  facet_grid(~sentiment_overall)


#timeline of events

"source: http://www.tennessean.com/story/news/2017/10/28/white-lives-matter-rally-murfreesboro-tn-live-updates-shelbyville-tn-stream-video/804380001/"
## [1] "source: http://www.tennessean.com/story/news/2017/10/28/white-lives-matter-rally-murfreesboro-tn-live-updates-shelbyville-tn-stream-video/804380001/"
#nrc anticipation and anger over time

sentiment_share_limited <- sentiment_nrc_time %>%
  mutate(Sentiment = sentiment, n = n_sentiment) %>%
  filter(created < as.POSIXct("2017-10-28 17:30:00") & created > as.POSIXct("2017-10-28 02:00:00")) %>%
  filter(day == "Saturday" & sentiment %in% c("anticipation", "anger")) %>%
  ggplot(aes(x = round_qhr, y = sentiment_share)) + 
  geom_point(aes(size = n, color = Sentiment), alpha = 0.35) +
  geom_smooth(se = FALSE, aes(color = sentiment)) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 03:00:00"), y = 0.4, xend = as.POSIXct("2017-10-28 03:00:00"), yend = 0.215), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 03:00:00"), y = 0.20, label = "Murfreesboro: Police \n close town square"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 09:00:00"), y = 0, xend = as.POSIXct("2017-10-28 09:00:00"), yend = 0.135), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 09:00:00"), y = 0.15, label = "Shelbyville: Law enforcement \n arrives in riot gear"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 10:15:00"), y = 0.4, xend = as.POSIXct("2017-10-28 10:15:00"), yend = 0.285), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 10:15:00"), y = 0.3, label = "Shelbyville: First white nationalist \n and counter-protestors arrive"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 12:00:00"), y = 0, xend = as.POSIXct("2017-10-28 12:00:00"), yend = 0.07), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 12:00:00"), y = 0.085, label = "Shelbyville: 400 counter-protestors, \n 200 white nationalists on site"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 14:00:00"), y = 0.4, xend = as.POSIXct("2017-10-28 14:00:00"), yend = 0.27), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 14:20:00"), y = 0.255, label = "Shelbyville: White nationalists elect \n to move to Murfeesboro"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 16:00:00"), y = 0, xend = as.POSIXct("2017-10-28 16:00:00"), yend = 0.035), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 16:30:00"), y = 0.05, label = "Murfreesboro rally fizzles \n as white nationalist numbers dwindle"), size = 4, data = data.frame()) +
  ggtitle("Sentiment Share during Murfreesboro/Shelbyville Protest") +
  theme(legend.title = element_text(size = 18), legend.text = element_text(size = 15), plot.title = element_text(hjust = 0.5, size = 25), axis.text = element_text(size = 15), axis.title = element_text(size = 15)) +
  scale_x_datetime(labels = date_format("%H:%M"), breaks = date_breaks("2 hour")) +
  scale_y_continuous(limits = c(0, 0.4)) +
  xlab("Hour") +
  ylab("Share of Total Sentiments Expressed")


#limitations

#assignment of afinn sentiments are somewhat arbitrary and may not have the context for specific events

afinn_sentiments %>%
  filter(word %in% c("absentee", "aboard", "apocalyptic", "cheer", "cheat", "charm", "damn", "rejoice", "prick", "thrilled")) %>%
  arrange(score)
## # A tibble: 10 x 2
##           word score
##          <chr> <int>
##  1       prick    -5
##  2        damn    -4
##  3       cheat    -3
##  4 apocalyptic    -2
##  5    absentee    -1
##  6      aboard     1
##  7       cheer     2
##  8       charm     3
##  9     rejoice     4
## 10    thrilled     5
afinn_sentiments %>%
  summarise(sum(word %in% c("confederate", "nazi", "kkk")))
## # A tibble: 1 x 1
##   `sum(word %in% c("confederate", "nazi", "kkk"))`
##                                              <int>
## 1                                                0
# Some tweets cannot be assigned a sentiment (improper spelling, slang terms, etc.)

tweet_text %>%
  anti_join(tweets_afinn_sentiment, by = c("created", "screenName")) %>%
  mutate(key_word = str_detect(text, paste(c("nazi", "kkk", "supremacy"),collapse = '|'))) %>%
  filter(key_word == TRUE) %>%
  select(screenName, text) %>%
  head(10)
##         screenName
## 1  wearewatchingtn
## 2        RyanGava_
## 3        RyanGava_
## 4        RyanGava_
## 5     connectedage
## 6          pjoycli
## 7   OliviaJWeibert
## 8          keya267
## 9    SpotdogWright
## 10       damonakin
##                                                                                      text
## 1                nazis got run murfreesboro came brentwood beat white woman black man are
## 2                                                             lkkkklkkkkjjbkajob chorando
## 3                                                            kkkkkkkkkkk nao uso facetune
## 4                                                                        kkkkkkkkkkkkkkkk
## 5                nazis got run murfreesboro came brentwood beat white woman black man are
## 6                                                                               nazi hats
## 7                                     hundreds us handful 10 most klan nazis guts show up
## 8  pos nazis matching murfreesboro mtsu taking pictures say hi family friends neighbors b
## 9                                                           kkk blm good groups end story
## 10               nazis got run murfreesboro came brentwood beat white woman black man are
# sentiment analysis can't correct spelling

tweet_text %>%
  filter(screenName == "justasking3time") %>%
  select(text) %>%
  head(10)
##                                                                    text
## 1                take care situation right now 2017 middle indicting pe
## 2                      omg saying photo taken 2016 campaign clinton col
## 3              well one reason might consider state gave corrections pr
## 4                                   maybe talk gold silver investing th
## 5                                               you message me for what
## 6  terry tweet hillary me think described hillary must missed something
## 7                                                hey state live in mayb
## 8                             u think trump gop will keep exactly way f
## 9                                                    friends families t
## 10                                                    ok realize expefi
# nrc sentiments lack context as well

nrc_sentiments %>%
  filter(word %in% c("confederate", "nationalist", "trump", "president")) %>%
  kable()
word sentiment
confederate positive
confederate trust
president positive
president trust
trump surprise
# Friday average sentiment over time

tweets_afinn_sentiment %>%
  filter(created < as.POSIXct("2017-10-27 17:30:00") & created > as.POSIXct("2017-10-27 02:00:00")) %>%
  group_by(day, minutes_15) %>%
  mutate(tweet_sentiment = mean(tweet_sentiment),
         n = n(),
         Positive = (tweet_sentiment > 0)) %>%
  filter(day == "Friday") %>%
  ggplot(aes(x = round_qhr, y = tweet_sentiment)) +
  geom_point(aes(size = n, color = Positive), alpha = 0.75) +
  geom_smooth(color = "grey", linetype = "dotted") +
  scale_x_datetime(labels = date_format("%H:%M"), breaks = date_breaks("2 hour")) +
  scale_y_continuous(limits = c(-2, 2)) +
  xlab("Hour") +
  ylab("Average Twitter Sentiment")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 171 rows containing non-finite values (stat_smooth).
## Warning: Removed 171 rows containing missing values (geom_point).

#Sunday average sentiment over time

tweets_afinn_sentiment %>%
  filter(created < as.POSIXct("2017-10-29 17:30:00") & created > as.POSIXct("2017-10-29 02:00:00")) %>%
  group_by(day, minutes_15) %>%
  mutate(tweet_sentiment = mean(tweet_sentiment),
         n = n(),
         Positive = (tweet_sentiment > 0)) %>%
  filter(day == "Sunday") %>%
  ggplot(aes(x = round_qhr, y = tweet_sentiment)) +
  geom_point(aes(size = n, color = Positive), alpha = 0.75) +
  geom_smooth(color = "grey", linetype = "dotted") +
  scale_x_datetime(labels = date_format("%H:%M"), breaks = date_breaks("2 hour")) +
  scale_y_continuous(limits = c(-2, 2)) +
  xlab("Hour") +
  ylab("Average Twitter Sentiment")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Trumps network

trump <- network_stars_tweets %>%
  filter(created < as.POSIXct("2017-10-28 17:30:00") & created > as.POSIXct("2017-10-28 02:00:00")) %>%
  filter(network == "realDonaldTrump") %>%
  group_by(day, minutes_15) %>%
  mutate(tweet_sentiment = mean(tweet_sentiment),
         n = n(),
         Positive = (tweet_sentiment > 0)) %>%
  filter(day == "Saturday") %>%
  ggplot(aes(x = round_qhr, y = tweet_sentiment)) +
  geom_point(aes(size = n, color = Positive), alpha = 0.75) +
  geom_smooth(color = "orange", linetype = "dotted", se = FALSE) +
  scale_x_datetime(labels = date_format("%H:%M"), breaks = date_breaks("4 hour")) +
  xlab("Hour") +
  ylab("Average Twitter Sentiment") +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 03:00:00"), y = -4, xend = as.POSIXct("2017-10-28 03:00:00"), yend = 0), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 03:00:00"), y = 0.1, label = "Murfreesboro: Police \n close town square"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 09:00:00"), y = 4, xend = as.POSIXct("2017-10-28 09:00:00"), yend = -0.4), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 09:00:00"), y = -0.5, label = "Shelbyville: Law enforcement \n arrives in riot gear"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 10:15:00"), y = -4, xend = as.POSIXct("2017-10-28 10:15:00"), yend = -2), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 10:15:00"), y = -1.9, label = "Shelbyville: First white nationalist \n and counter-protestors arrive"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 12:00:00"), y = 4, xend = as.POSIXct("2017-10-28 12:00:00"), yend = 0.6), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 11:35:00"), y = 0.5, label = "Shelbyville: 400 counter-protestors, \n 200 white nationalists on site"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 14:00:00"), y = 4, xend = as.POSIXct("2017-10-28 14:00:00"), yend = -0.1), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 15:30:00"), y = -0.2, label = "Shelbyville: White nationalists elect \n to move to Murfeesboro"), size = 4, data = data.frame()) +
  geom_segment(mapping = aes(x = as.POSIXct("2017-10-28 16:00:00"), y = -4, xend = as.POSIXct("2017-10-28 16:00:00"), yend = -2.5), size = 0.2,
               linetype = "dashed", data = data.frame()) +
  geom_text(mapping = aes(x = as.POSIXct("2017-10-28 15:30:00"), y = -2.35, label = "Murfreesboro rally fizzles \n as white nationalist numbers dwindle"), size = 4, data = data.frame()) +
  ggtitle("Donald Trump's Network during Murfreesboro/Shelbyville Protest") +
  theme(legend.title = element_text(size = 12), legend.text = element_text(size = 9), plot.title = element_text(hjust = 0.5, size = 25), axis.text = element_text(size = 15), axis.title = element_text(size = 15))


# Plots

saturday
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 347 rows containing non-finite values (stat_smooth).
## Warning: Removed 347 rows containing missing values (geom_point).

saturday_stars
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1.5092e+009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 918
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 7.3875e+006
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning: Computation failed in `stat_smooth()`:
## NA/NaN/Inf in foreign function call (arg 5)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1.5092e+009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 904.5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at 1.5092e+009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 20.25
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 20.25
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning: Computation failed in `stat_smooth()`:
## NA/NaN/Inf in foreign function call (arg 5)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1.5092e+009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2812.5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 7.29e+006
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_smooth).

trump
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sentiment_share_limited
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

#all the tools in the toolshed
require(igraph)
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(dplyr)
library(tidytext)
library(tidyverse)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(ggplot2)
library(network)
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
## 
## Attaching package: 'network'
## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges,
##     delete.vertices, get.edge.attribute, get.edges,
##     get.vertex.attribute, is.bipartite, is.directed,
##     list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
library(RColorBrewer)
library(intergraph)
library(ggnet)
library(svgPanZoom)    
library(DT) 
library(ggrepel)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:network':
## 
##     is.discrete
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(ggnetwork)
library(ggiraph)
library(stringr)
library(sna)
## Loading required package: statnet.common
## 
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
## 
##     order
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
## 
## Attaching package: 'sna'
## The following objects are masked from 'package:igraph':
## 
##     betweenness, bonpow, closeness, components, degree,
##     dyad.census, evcent, hierarchy, is.connected, neighborhood,
##     triad.census
#reads in the data and removes unnecessary variables
url <- "https://raw.githubusercontent.com/twitter260/twitter260.github.io/master/our_code/text_identities.csv"
tweets <- read.csv(url(url), stringsAsFactors = FALSE)
tweets$truncated <- NULL
tweets$replyToSID <- NULL
tweets$replyToUID <- NULL
tweets$longitude <- NULL
tweets$latitude <- NULL

# Sentiments (-5,-4,...,4,5)
afinn_sentiments <- get_sentiments("afinn")

# separate each word
tokens <- unnest_tokens(tweets, word, text, to_lower = TRUE) %>%
  anti_join(stop_words)
## Joining, by = "word"
# Assign sentiment to each word
tokens_afinn_sentiment <- tokens %>%
  inner_join(afinn_sentiments) %>%
  group_by(id, created) %>%
  mutate(tweet_sentiment = mean(score)) 
## Joining, by = "word"
#only take unique combinations of participants
dat <- unique(tokens_afinn_sentiment[c("X", "screenName", "replyToSN", "created", "retweetCount", "favoriteCount", "tweet_sentiment")])



#those who tweet
tweeters <- unique(dat$screenName)

#those who are tweeted at
ind <- !is.na(dat$replyToSN)
responders <- unique(dat$replyToSN[ind])

####weighted network
#build the the adjacency matrix
ind <- which(!is.na(dat$replyToSN))
dat.matrix <- as.matrix(dat[ind,c("replyToSN","screenName","tweet_sentiment")]) 
g.w <- graph.edgelist(dat.matrix[,1:2], directed = FALSE)

#scale the vertices to degree size
g.w <- igraph::simplify(g.w)
deg <- igraph::degree(g.w, mode = 'all')
V(g.w)$size <- sqrt(deg)*3

#add the weights
weights <- as.numeric(dat.matrix[,3])
weights[weights == 0] <- 10^-6
E(g.w)$weight <- weights
## Warning in eattrs[[name]][index] <- value: number of items to replace is
## not a multiple of replacement length

Visualize the overall graph!

library(ggnetwork)
#reprocess
g.w <- igraph::simplify(g.w)
gw <- g.w
E(gw)$weight <- 1
gw <- igraph::simplify(gw, edge.attr.comb="sum")
V(gw)$weight <- igraph::degree(g.w, mode = 'all')
#redo
set.seed(1492)
Dat <- ggnetwork(gw, layout="fruchtermanreingold", arrow.gap=0, cell.jitter=0)
Dat$x <- as.vector(Dat$x)
Dat$y <- as.vector(Dat$y)
Dat$xend <- as.vector(Dat$xend)
Dat$yend <- as.vector(Dat$yend)
Dat$size <- as.vector(Dat$size)

#makes the plot of the overall network
ggplot(Dat) +
  geom_edges(aes(x=x, y=y, xend = xend, yend = yend),
             color="grey50", curvature=0.1, size=0.15, alpha=1/2) +
  geom_nodes(data=Dat,
             aes(x=x, y=y, xend=xend, yend=yend, size=sqrt(size)),
             alpha=1/3) +
  theme_blank() +
  theme(legend.position="none") -> gg
## Warning: Ignoring unknown aesthetics: xend, yend
gg

Summary statistics of the overall set.

g.w <- igraph::simplify(g.w)
#summary about the degrees of this graph 
PDF <- degree.distribution(g.w)
CDF <- degree.distribution(g.w, cumulative = T)

#Degree distribution of the graph
plot(x = 0:max(igraph::degree(g.w)), y=1-CDF, pch=19, cex=0.5, col="orange", 
     xlab="Degree", ylab="Frequency", type = 'o', main = "Degree Frequency")
points(PDF, type = 'o', col = 'blue', cex = 0.5)
legend(x = 57.5, y = 0.15, legend = c('Cumulative Frequency', 'Frequency'), col = c('orange','blue'), pch = c(19,1), lty = c(1,1), cex = 0.5, bty = 'n')

#Distribution of weights of the graph
weights <- data.frame(weights)
ggplot(weights) + geom_histogram(aes(weights), fill = 'red', col = 'black', binwidth = 0.5) + xlab("Weights") + ylab("Weight Counts") + ggtitle("Weights of the Overall Network") + 
   theme(plot.title = element_text(hjust = 0.5))

#cluster size
clusters <- data.frame(clusters = clusters(g.w)$csize)
ggplot(clusters) + geom_histogram(aes(clusters), fill = 'purple', col = 'orange', bins = 20) + scale_y_log10() + scale_x_log10() + xlab("Cluster Size (Log-10)") + ylab("Counts (Log-10)")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 10 rows containing missing values (geom_bar).

Filter the overall network

V(g.w)$comp <- igraph::components(g.w)$membership
#isolate 15 largest componets
comps <- as.numeric(names(sort(table(V(g.w)$comp), decreasing = T)[1:15]))

#filtering
ind <- which(V(g.w)$comp %in% comps)
main <- induced_subgraph(g.w,ind)
sum(table(V(main)$comp)[1])
## [1] 1648
sum(table(V(main)$comp)[2:15])
## [1] 287
ind <- which(V(g.w)$comp %in% comps[1])
LCC <- induced_subgraph(g.w,ind)
weights.LCC <- E(LCC)$weight
#distribution of weights of LCC
ggplot(data.frame(weights.LCC)) + geom_histogram(aes(weights.LCC), fill = 'red', col = 'black', binwidth = 0.5) +  xlab("Weights") + ylab("Weight Counts") + ggtitle("Weights of the Largest Connected Component") + theme(plot.title = element_text(hjust = 0.5))

#more filtering to examine the components
positive <- delete_edges(main, E(main)[weight <= 10^-6])
negative <- delete_edges(main, E(main)[weight > 10^-6])
par(mfrow = c(2,2))
plot(main, vertex.label = NA, layout = layout_with_mds(main), main = "Fifteen Largest Connected Components")
plot(positive, vertex.label = NA, layout = layout_with_mds(positive), main = "Positive Connections\nin\n Largest Connected Components")
plot(negative, vertex.label = NA, layout = layout_with_mds(negative), main = "Negative Connections\nin\n Largest Connected Components")
plot(LCC, vertex.label = NA, layout = layout_with_mds(LCC), main = "Largest Connected Component")

Summary of the largest component

plot(1- degree.distribution(LCC, cumulative = T), type = 'o', col = 'orange', xlab = "Degrees", ylab = "Frequency", pch = 19, main = "Degree Frequency")
lines(degree.distribution(LCC, cumulative = F), type = 'o', col = 'blue')
legend(x = 57.5, y = 0.15, legend = c('Cumulative Frequency', 'Frequency'), col = c('orange','blue'), pch = c(19,1), lty = c(1,1), cex = 0.5, bty = 'n')

sum(count_triangles(LCC))
## [1] 6
cliques(LCC, min = 3)
## [[1]]
## + 3/1648 vertices, named, from 4f06ed1:
## [1] ChadCaldwell24 _corymiller    Kyle_Hardin_VU
## 
## [[2]]
## + 3/1648 vertices, named, from 4f06ed1:
## [1] BlakeTunechi   TheReedEmerson TristanLogue

Development of graph over time

net <- network(intergraph::asNetwork(LCC),directed = F)

# Get a data.frame of edges and add an arbitrary time unit
dat <- as.data.frame(igraph::get.edgelist(LCC), stringsAsFactors = F) #get dataframe of edges
colnames(dat)<-c("from", "to") #add column names
dat$time <- round(seq.int(1,8,length.out=nrow(dat)),0) #add a time variable

# Convert df to a matrix of when node present or absent
tmp = data.frame(nodeid = c(dat$from,dat$to), time=dat$time) %>% group_by(nodeid) %>% 
      filter(time==min(time)) %>% unique %>% arrange(nodeid)

out <- sapply(tmp$time, function(i) c(rep(0, i-1), rep(1,8-i+1)))
out[out==0]<-NA



# Define vertex attribute activation as 1 or NA:
net %v% "t1" = out[1,]
net %v% "t2" = out[2,]
net %v% "t3" = out[3,]
net %v% "t4" = out[4,]
net %v% "t5" = out[5,]
net %v% "t6" = out[6,]
net %v% "t7" = out[7,]
net %v% "t8" = out[8,]


#for color
mycols <- rev(brewer.pal(9, "Greens")[-1]) #remove really overly light color

# Set up the initial layout
x = gplot.layout.fruchtermanreingold(net, NULL) 
net %v% "x" = x[, 1]
net %v% "y" = x[, 2]

# Create ggnet2 plots removing inactive nodes and setting initial layout
t1 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t1")
## na.rm removed 1509 nodes out of 1648
t2 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t2")
## na.rm removed 1264 nodes out of 1648
t3 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t3")
## na.rm removed 1044 nodes out of 1648
t4 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t4")
## na.rm removed 816 nodes out of 1648
t5 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t5")
## na.rm removed 596 nodes out of 1648
t6 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t6")
## na.rm removed 362 nodes out of 1648
t7 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t7")
## na.rm removed 126 nodes out of 1648
t8 = ggnet2(net, mode = c("x", "y"), size = 0,  node.color = mycols[tmp$time], na.rm = "t8")
## na.rm removed 0 nodes out of 1648
# Set up some plot features
b1 = theme(panel.background = element_rect(color = "grey50"),
           plot.title = element_text(size=rel(2.1)))
b2 = geom_point(aes(color = color), size = 1, color = "white")
b3 =  geom_point(aes(color = color), size = 1, alpha = 0.4)
b4 =  geom_point(aes(color = color), size = 1) 
b5 =  guides(color = FALSE)
y1 = scale_y_continuous(limits = range(x[, 2] * 1.1), breaks = NULL)
x1 = scale_x_continuous(limits = range(x[, 1] * 1.1), breaks = NULL)

# show each temporal network
gridExtra::grid.arrange(t1 + x1 + y1  + ggtitle("t = 1") + b1 + b2 + b3 + b4 + b5,
                        t2 + x1 + y1  + ggtitle("t = 2") + b1 + b2 + b3 + b4 + b5,
                        t3 + x1 + y1  + ggtitle("t = 3") + b1 + b2 + b3 + b4 + b5,
                        t4 + x1 + y1  + ggtitle("t = 4") + b1 + b2 + b3 + b4 + b5,
                        t5 + x1 + y1  + ggtitle("t = 5") + b1 + b2 + b3 + b4 + b5,
                        t6 + x1 + y1  + ggtitle("t = 6") + b1 + b2 + b3 + b4 + b5,
                        t7 + x1 + y1  + ggtitle("t = 7") + b1 + b2 + b3 + b4 + b5,
                        t8 + x1 + y1  + ggtitle("t = 8") + b1 + b2 + b3 + b4 + b5,
                        nrow = 2)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

Interactive plot of hubscore and centrality

#name of the vertices
handle <- V(LCC)$name
#Centrality of vertices
V(LCC)$size <- igraph::degree(LCC, mode = 'all')
Centrality <- V(LCC)$size
#hubscore of vertices
V(LCC)$power <- hub.score(LCC)$vector
hubscore <- V(LCC)$power

df <- data.frame(handle = handle, centrality = Centrality, hub_score = hubscore, authority_score = authority.score(LCC)$vector)

gg_point_0 <- ggplot(df, aes(x = centrality, y = hub_score, tooltip = handle, data_id = handle) ) + 
  geom_point_interactive(size=1) + theme_bw() + theme(text = element_text(size = rel(5.5))) +ylab("Hub Score")+
  xlab("Degree Centrality")

tooltip_css <- "background-opacity:0;font-size: 200%;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"

gi <- ggiraph(code = {print(gg_point_0)}, tooltip_offx = 10, tooltip_offy = -30,tooltip_extra_css = tooltip_css, tooltip_opacity = .75,hover_css = "stroke:red;fill:red;stroke-width:7pt" )


saveWidget(gi, file = "gi.html",selfcontained= T)

gg_point_1 <- ggplot(df, aes(x = centrality, y = authority_score, tooltip = handle, data_id = handle) ) + 
  geom_point_interactive(size=1) + theme_bw() + theme(text = element_text(size = rel(5.5))) +ylab("Authority Score")+
  xlab("Degree Centrality")

tooltip_css <- "background-opacity:0;font-size: 200%;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"

ga <- ggiraph(code = {print(gg_point_0)}, tooltip_offx = 10, tooltip_offy = -30,tooltip_extra_css = tooltip_css, tooltip_opacity = .75,hover_css = "stroke:red;fill:red;stroke-width:7pt" )
ga

Pretty graph based on high centrality nodes

t <- datatable(arrange(data_frame(Person=V(LCC)$name, Centrality=V(LCC)$size), desc(Centrality)))
t
colnames(dat) <- c("From:", "To:", "Times")
tf <- datatable(arrange(dat, desc(Times)))
tf
g <- LCC
g <- igraph::simplify(g, edge.attr.comb="sum")

#redo
set.seed(1492)
Dat <- ggnetwork(g, layout="fruchtermanreingold", arrow.gap=0, cell.jitter=0)
Dat$x <- as.vector(Dat$x)
Dat$y <- as.vector(Dat$y)
Dat$xend <- as.vector(Dat$xend)
Dat$yend <- as.vector(Dat$yend)
Dat$size <- as.vector(Dat$size)
ggplot() +
  geom_edges(data=Dat, 
             aes(x=x, y=y, xend=xend, yend=yend),
             color="grey50", curvature=0.1, size=0.15, alpha=1/2) +
  geom_nodes(data=Dat,
             aes(x=x, y=y, xend=xend, yend=yend, size=sqrt(size)),
             alpha=1/3) +  
  geom_label_repel(data=unique(Dat[Dat$size>15,c(1,2,6,7)]),
                   aes(x=x, y=y, label=vertex.names), 
                   size=2, color="#8856a7") +
  theme_blank() +
  theme(legend.position="none") -> gg
## Warning: Ignoring unknown aesthetics: xend, yend
gg

Content from the largest characters

top <- unique(Dat[Dat$size>15,6])
tweetstops <- read.csv("https://raw.githubusercontent.com/twitter260/twitter260.github.io/master/our_code/murfreesboro102817.csv", stringsAsFactors = F)
tweetstops <- tweetstops %>% filter(replyToSN %in% top | screenName %in% top) 
replace_reg <- "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https"
for(i in 1:length(tweetstops[,1])){
  tweet <- tweetstops[i,1]
  tweet <- sub("rt ", "", tweet) #remove retweet 
  tweet <- gsub("@\\w+", "", tweet) # remove at(@)
  tweet <- gsub("&lt;3","",tweet) #removes ASCII hearts <3 
  tweet <- gsub("&lt;|&gt;|&le;|&ge;","",tweet) #removes html <, >, <=, >=
  tweet <- str_replace_all(tweet ,replace_reg, "")  # remove links https 
  tweet <- gsub("[ |\t]{2,}", " ", tweet) # remove tabs 
  tweet <- iconv(tweet, "latin1", "ASCII", sub="") #makes emojis readable 
  tweet <- gsub("<[^>]+>", "", tweet) #removes remaining text from emojis
  tweet <- gsub("[\r|\n|\t|\v|\f]", "", tweet) #removes form feeds tabs etc
  tweet <- gsub("^ ", "", tweet)  # remove blank spaces at the beginning
  tweet <- gsub(" $", "", tweet) # remove blank spaces at the end
  tweetstops[i,1] <- capitalize(tweet)
}
topTweets <- tweetstops
topTweets <- topTweets[,c(1,11,4)]
TT <- datatable(topTweets)
TT

Graph based on high hub score

g <- LCC
g <- igraph::simplify(g, edge.attr.comb="sum")
Dat <- ggnetwork(g, layout="fruchtermanreingold", arrow.gap=0, cell.jitter=0)
Dat$x <- as.vector(Dat$x)
Dat$y <- as.vector(Dat$y)
Dat$xend <- as.vector(Dat$xend)
Dat$yend <- as.vector(Dat$yend)
Dat$size <- as.vector(Dat$size)
ggplot() +
  geom_edges(data=Dat, 
             aes(x=x, y=y, xend=xend, yend=yend),
             color="grey50", curvature=0.1, size=0.1, alpha=1/2) +
  geom_nodes(data=Dat,
             aes(x=x, y=y, xend=xend, yend=yend, size=sqrt(size)),
             alpha=1/3) +  
  geom_label_repel(data=unique(Dat[Dat$power>0.10,c(1,2,6,7)]),
                   aes(x=x, y=y, label=vertex.names), 
                   size=2, color="#8856a7") + 
  theme_blank() +
  theme(legend.position="none") -> gg
## Warning: Ignoring unknown aesthetics: xend, yend
gg

library(visNetwork)
comps <- as.numeric(names(sort(table(V(g.w)$comp), decreasing = T)[3]))
ind <- which(V(g.w)$comp %in% comps)
three <- induced_subgraph(g.w,ind)

#interactive network
set.seed(1492)
nodes <- data.frame(id = as.character(V(three)$name))
E(three)$weight <- abs(E(three)$weight)
nodes$group <- cluster_fast_greedy(three)$membership
V(three)$size <- igraph::degree(three)
nodes$font.size <- 20
nodes$size <- V(three)$size
edges <- data.frame(get.edgelist(three))
colnames(edges)<-c("from","to")

# Plot with defaut layout
vN <- visNetwork(nodes, edges, height = "600px") %>%
  visIgraphLayout() %>%
  visNodes(size = nodes$size*3) %>%
  visOptions(selectedBy = "group", 
             highlightNearest = TRUE, 
             nodesIdSelection = TRUE) %>%  
  visInteraction(keyboard = TRUE,
                 dragNodes = T, 
                 dragView = T, 
                 zoomView = T)
vN